package net.fortytwo.twitlogic.persistence; import edu.rpi.tw.twctwit.query.RelatedHashtagsResource; import edu.rpi.tw.twctwit.query.RelatedTweetsResource; import net.fortytwo.sesametools.ldserver.GraphResource; import net.fortytwo.sesametools.ldserver.LinkedDataServer; import net.fortytwo.sesametools.ldserver.ServerException; import net.fortytwo.sesametools.ldserver.WebResource; import net.fortytwo.sesametools.ldserver.query.SparqlResource; import net.fortytwo.twitlogic.TwitLogic; import net.fortytwo.twitlogic.persistence.beans.AdministrativeDivision; import net.fortytwo.twitlogic.persistence.beans.Agent; import net.fortytwo.twitlogic.persistence.beans.City; import net.fortytwo.twitlogic.persistence.beans.Country; import net.fortytwo.twitlogic.persistence.beans.Document; import net.fortytwo.twitlogic.persistence.beans.Feature; import net.fortytwo.twitlogic.persistence.beans.Graph; import net.fortytwo.twitlogic.persistence.beans.Image; import net.fortytwo.twitlogic.persistence.beans.MicroblogPost; import net.fortytwo.twitlogic.persistence.beans.Neighborhood; import net.fortytwo.twitlogic.persistence.beans.Point; import net.fortytwo.twitlogic.persistence.beans.PointOfInterest; import net.fortytwo.twitlogic.persistence.beans.SpatialThing; import net.fortytwo.twitlogic.persistence.beans.UserAccount; import net.fortytwo.twitlogic.persistence.sail.AGRepositorySailFactory; import net.fortytwo.twitlogic.persistence.sail.MemoryStoreFactory; import net.fortytwo.twitlogic.persistence.sail.NativeStoreFactory; import net.fortytwo.twitlogic.persistence.sail.Neo4jSailFactory; import net.fortytwo.twitlogic.services.twitter.TwitterClient; import net.fortytwo.twitlogic.util.Factory; import net.fortytwo.twitlogic.util.properties.PropertyException; import net.fortytwo.twitlogic.util.properties.TypedProperties; import org.openrdf.concepts.owl.ObjectProperty; import org.openrdf.concepts.owl.Thing; import org.openrdf.elmo.ElmoManagerFactory; import org.openrdf.elmo.ElmoModule; import org.openrdf.elmo.sesame.SesameManagerFactory; import org.openrdf.model.Resource; import org.openrdf.query.QueryLanguage; import org.openrdf.repository.Repository; import org.openrdf.repository.RepositoryConnection; import org.openrdf.repository.RepositoryException; import org.openrdf.repository.sail.SailRepository; import org.openrdf.rio.RDFFormat; import org.openrdf.rio.RDFHandler; import org.openrdf.rio.RDFHandlerException; import org.openrdf.rio.RDFParseException; import org.openrdf.rio.Rio; import org.openrdf.sail.Sail; import org.openrdf.sail.SailConnectionListener; import org.openrdf.sail.SailException; import org.openrdf.sail.memory.MemoryStore; import org.openrdf.sail.nativerdf.NativeStore; import org.restlet.Component; import org.restlet.data.Protocol; import org.restlet.resource.Directory; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.Set; import java.util.Timer; import java.util.logging.Logger; import java.util.zip.GZIPOutputStream; /** * @author Joshua Shinavier (http://fortytwo.net). */ public class TweetStore { private static final Logger LOGGER = TwitLogic.getLogger(TweetStore.class); private static final int DEFAULT_PORT = 8182; private final Sail sail; private boolean doNotRefreshCoreMetadata = false; private Repository repository; private SesameManagerFactory elmoManagerFactory; private boolean initialized = false; private Factory<SailConnectionListener> sailConnectionListenerFactory; final Set<TweetStoreConnection> openConnections; private TwitterClient twitterClient; private static TweetStore INSTANCE; public static TweetStore getInstance() { return INSTANCE; } /** * The Sesame storage and inference layer (Sail) will be constructed according to configuration properties. * * @throws TweetStoreException if construction fails */ public TweetStore() throws TweetStoreException { this(createSail()); INSTANCE = this; } /** * @param sail a Sesame storage and inference layer */ public TweetStore(final Sail sail) { this.sail = sail; openConnections = Collections.synchronizedSet(new HashSet<TweetStoreConnection>()); } public TwitterClient getTwitterClient() { return twitterClient; } public void initialize() throws TweetStoreException { if (initialized) { throw new IllegalStateException("store has already been initialized"); } LOGGER.info("initializing TwitLogic local store"); repository = new SailRepository(sail); if (!doNotRefreshCoreMetadata) { refreshCoreMetadata(repository); } // Elmo setup. ElmoModule adminElmoModule = new ElmoModule(); adminElmoModule.setGraph(null); // for TwitLogic.AUTHORITATIVE_GRAPH adminElmoModule.addConcept(Thing.class); adminElmoModule.addConcept(ObjectProperty.class); // Dunno why this is necessary, but Elmo logs warnings without it // TwitLogic-specific classes adminElmoModule.addConcept(AdministrativeDivision.class); adminElmoModule.addConcept(Agent.class); adminElmoModule.addConcept(City.class); adminElmoModule.addConcept(Country.class); adminElmoModule.addConcept(Document.class); adminElmoModule.addConcept(Feature.class); adminElmoModule.addConcept(Graph.class); adminElmoModule.addConcept(Image.class); adminElmoModule.addConcept(MicroblogPost.class); adminElmoModule.addConcept(Neighborhood.class); adminElmoModule.addConcept(Point.class); adminElmoModule.addConcept(PointOfInterest.class); adminElmoModule.addConcept(SpatialThing.class); adminElmoModule.addConcept(UserAccount.class); adminElmoModule.addConcept(org.openrdf.concepts.rdfs.Class.class); elmoManagerFactory = new SesameManagerFactory(adminElmoModule, repository); elmoManagerFactory.setQueryLanguage(QueryLanguage.SPARQL); elmoManagerFactory.setInferencingEnabled(false); addPeriodicDump(); initialized = true; } private void addPeriodicDump() throws TweetStoreException { TypedProperties conf = TwitLogic.getConfiguration(); try { File file = conf.getFile(TwitLogic.DUMP_FILE, null); if (null == file) { LOGGER.info("no dump file specified. Periodic data dumps will not be generated."); } else { long interval = conf.getLong(TwitLogic.DUMP_INTERVAL, -1); if (-1 == interval) { LOGGER.warning("no dump interval specified. Periodic data dumps will not be generated."); } else { boolean compressed = false; String s = file.getName(); if (s.endsWith(".gz")) { compressed = true; s = s.substring(0, s.length() - ".gz".length()); } int i = s.lastIndexOf('.'); if (i <= 0) { LOGGER.warning("dump file name could not be parsed. Periodic data dumps will not be generated."); } else { String ext = s.substring(i + 1); RDFFormat format = SesameTools.rdfFormatByExtension(ext); if (null == format) { LOGGER.warning("dump file format not recognized. Periodic data dumps will not be generated."); } else { new Timer().schedule( new DumpFileGeneratorTask(this, file, format, compressed), interval, interval); } } } } } catch (PropertyException e) { throw new TweetStoreException(e); } } public TweetStoreConnection createConnection() throws TweetStoreException { return new TweetStoreConnection(this, sailConnectionListenerFactory); } void notifyClosed(final TweetStoreConnection c) { openConnections.remove(c); } public Sail getSail() { if (!initialized) { throw new IllegalStateException("not yet initialized"); } return sail; } public Repository getRepository() { if (!initialized) { throw new IllegalStateException("not yet initialized"); } return repository; } public ElmoManagerFactory getElmoManagerFactory() { return elmoManagerFactory; } public void shutDown() throws TweetStoreException { if (!initialized) { throw new IllegalStateException("not yet initialized"); } LOGGER.info("shutting down TwitLogic local store"); //new Exception().printStackTrace(); // Note: elmoModule doesn't need to be closed or shutDown. // Make sure all connections are closed before shutting down the Sail. Collection<TweetStoreConnection> cons = new LinkedList<TweetStoreConnection>(); cons.addAll(openConnections); for (TweetStoreConnection c : cons) { c.close(); } LOGGER.info("shutting down triple store"); try { sail.shutDown(); } catch (SailException e) { throw new TweetStoreException(e); } LOGGER.info("done with shutdown"); } //////////////////////////////////////////////////////////////////////////// // convenience methods, may be moved /////////////////////////////////////// public void dump(final OutputStream out) throws RepositoryException, RDFHandlerException { RDFFormat format = RDFFormat.TRIG; LOGGER.info("dumping triple store in format " + format.getName() + " to output stream"); RDFHandler h = Rio.createWriter(format, out); RepositoryConnection rc = getRepository().getConnection(); try { rc.begin(); rc.export(h); } finally { rc.rollback(); rc.close(); } } public void dumpToFile(final File file, final RDFFormat format) throws IOException, RepositoryException, RDFHandlerException { LOGGER.info("dumping triple store in format " + format.getName() + " to file: " + file); OutputStream out = new FileOutputStream(file); try { RDFHandler h = Rio.createWriter(format, out); RepositoryConnection rc = getRepository().getConnection(); try { rc.export(h); } finally { rc.close(); } } finally { out.close(); } } public void dumpToCompressedFile(final File file, final RDFFormat format) throws IOException, RepositoryException, RDFHandlerException { LOGGER.info("dumping compressed triple store in format " + format.getName() + " to file: " + file); OutputStream out = new FileOutputStream(file); try { OutputStream gzipOut = new GZIPOutputStream(out); try { RDFHandler h = Rio.createWriter(format, gzipOut); RepositoryConnection rc = getRepository().getConnection(); try { rc.export(h); } finally { rc.close(); } } finally { gzipOut.close(); } } finally { out.close(); } } public void clear() throws TweetStoreException { try { RepositoryConnection rc = repository.getConnection(); try { rc.begin(); rc.clear(); rc.commit(); } finally { rc.rollback(); rc.close(); } } catch (RepositoryException e) { throw new TweetStoreException(e); } } public void load(final File file, final RDFFormat format) throws TweetStoreException { try { RepositoryConnection rc = repository.getConnection(); try { rc.begin(); try { rc.add(file, "http://example.org/baseURI", format); } catch (IOException e) { throw new TweetStoreException(e); } catch (RDFParseException e) { throw new TweetStoreException(e); } rc.commit(); } finally { rc.rollback(); rc.close(); } } catch (RepositoryException e) { throw new TweetStoreException(e); } } //////////////////////////////////////////////////////////////////////////// public static Sail createSail() throws TweetStoreException { TypedProperties props = TwitLogic.getConfiguration(); String sailType; try { sailType = props.getString(TwitLogic.SAIL_CLASS); } catch (PropertyException e) { throw new TweetStoreException(e); } System.out.println("creating Sail of type: " + sailType); SailFactory factory; if (sailType.equals(MemoryStore.class.getName())) { factory = new MemoryStoreFactory(props); } else if (sailType.equals(NativeStore.class.getName())) { factory = new NativeStoreFactory(props); } else if (sailType.equals("com.knowledgereefsystems.agsail.AllegroSail")) { factory = new AGRepositorySailFactory(props, false); } else if (sailType.equals("com.tinkerpop.blueprints.pgm.oupls.sail.GraphSail")) { factory = new Neo4jSailFactory(props); } else { throw new TweetStoreException("unhandled Sail type: " + sailType); } try { return factory.makeSail(); } catch (SailException e) { throw new TweetStoreException(e); } catch (PropertyException e) { throw new TweetStoreException(e); } } private void refreshCoreMetadata(final Repository repository) throws TweetStoreException { LOGGER.info("adding/refreshing core metadata"); try { RepositoryConnection rc = repository.getConnection(); try { rc.begin(); rc.remove((Resource) null, null, null, TwitLogic.CORE_GRAPH); rc.clearNamespaces(); String baseURI = "http://example.org/baseURI/"; rc.add(TwitLogic.class.getResourceAsStream("namespaces.ttl"), baseURI, RDFFormat.TURTLE, TwitLogic.CORE_GRAPH); rc.add(TwitLogic.class.getResourceAsStream("twitlogic-void.ttl"), baseURI, RDFFormat.TURTLE, TwitLogic.CORE_GRAPH); rc.add(TwitLogic.class.getResourceAsStream("twitterplaces.ttl"), baseURI, RDFFormat.TURTLE, TwitLogic.CORE_GRAPH); rc.commit(); } finally { rc.rollback(); rc.close(); } } catch (IOException e) { throw new TweetStoreException(e); } catch (RDFParseException e) { throw new TweetStoreException(e); } catch (RepositoryException e) { throw new TweetStoreException(e); } } public void doNotRefreshCoreMetadata() { this.doNotRefreshCoreMetadata = true; } public void setSailConnectionListenerFactory(Factory<SailConnectionListener> sailConnectionListenerFactory) { this.sailConnectionListenerFactory = sailConnectionListenerFactory; } public void startServer(final TwitterClient client) throws ServerException { twitterClient = client; try { String internalBaseURI = TwitLogic.getConfiguration().getURI(TwitLogic.SERVER_BASEURI).toString(); String externalBaseURI = TwitLogic.BASE_URI; final String datasetURI = TwitLogic.TWITLOGIC_DATASET; int port = TwitLogic.getConfiguration().getInt(TwitLogic.SERVER_PORT, DEFAULT_PORT); File staticContentDir = TwitLogic.getConfiguration().getFile(TwitLogic.SERVER_STATICCONTENTDIRECTORY); LinkedDataServer server = new LinkedDataServer(this.getSail(), internalBaseURI, externalBaseURI, datasetURI); Component component = new Component(); server.setInboundRoot(component); component.getServers().add(Protocol.HTTP, port); component.getServers().add(Protocol.FILE, port); component.getDefaultHost().attach("/", new Directory(server.getContext(), "file://" + staticContentDir + "/")); for (TwitLogic.ResourceType t : TwitLogic.ResourceType.values()) { String p = t.getUriPath(); if (!p.equals("graph") && !p.equals("person")) { component.getDefaultHost().attach("/" + p + "/", WebResource.class); } } component.getDefaultHost().attach("/person/twitter/", PersonResource.class); component.getDefaultHost().attach("/graph/", GraphResource.class); component.getDefaultHost().attach("/sparql", new SparqlResource()); component.getDefaultHost().attach("/stream/relatedTweets", new RelatedTweetsResource()); component.getDefaultHost().attach("/stream/relatedTags", new RelatedHashtagsResource()); server.start(); } catch (Throwable e) { throw new ServerException(e); } } }